/*Code for replication analysis and extensions.

Reference:
	Graddy-Reed, A., L. Lanahan, and N. M. V. Ross (2018), 
	'The Effect of R&D Investment on Graduate Student Productivity: Evidence from the Life Sciences,'  
	Journal of Policy Analysis and Management, 37(4): 809-834. 

For questions about the code:
	A. Graddy-Reed: graddyre@usc.edu (first author of stata code)
	L. Lanahan: llanahan@uoregon.edu

Code is organized as follows: 
	Part 1: Descriptive Statistics -- Wide-Form Data (begin line: 32)
	Part 2: Wide-Form Data Analysis -- Additional Outcomes & Robustness Checks (begin line: 136)
	Part 3: Long-Form Data Analysis - Primay Model & Outcome (begin line: 174)
	Part 4: Long-Form Data Analysis - Robustness & Sensitivity Tests (begin line: 350)
	Part 5: Long-Form Data Analysis - Additional Outcomes (begin line: 557)
	
Stata version: Stata 14 or higher

Note: Need to install two stata commands: vce2way & powerreg				  */
********************************************************************************
*** Pull in Wide Form Data 
clear all
set more off
estimates clear
capture log close
macro drop all
global dir ** TO SET MANUALLY **
use "${dir}/JPAM_Wide Form_dataverse.dta"
********************************************************************************
**** Part 1: Descriptive Statistics ****
********************************************************************************
{
** General Information
tab pi_appear_twice
	*23 individuals applied twice (only most-recent instance kept)
tab foc_confirmed
	*686 confirmed first placement (191 not confirmed)
tab year_degree
tab degree_gap

** Table A2: Comparison of Sub-Fields to Full Sample
	* Run from Pilot I Sample Selection Do File

** Table A4: Sample Distribution by Year
tab year_prop
tab year_prop award
tab year_prop field

** Table 1: Summary Statistics for Publishing Sample

* Publishing Sample
gen primesample = .
replace primesample = 1 if tot_pub_5<5 & tot_pub !=. & female !=. & carneigie_veryhigh !=. & g_d_uni_public !=. & g_d_avg_pubs_per_fac !=. & g_d_ProgramSize_Q !=. & g_d_R3_mid !=. & g_d_grfp_total2yr !=. & g_df_fed_HE_RD_ln !=.
replace primesample = 0 if primesample == .

* Generate FA Share Variable
gen FA_share = FA_tot_pub_16/tot_pub

* Globals
global publications tot_pub first_pub_grfp_ref any_prior_pub any_FA_pub FA_first_pub_grfp_ref FA_share
global pubrank top01_tot_pub_max top05_tot_pub_max top10_tot_pub_max
global controls female field_*
global outcomes complete_phd time foc_academic research postdoc
global insttraits g_d_avg_pubs_per_fac g_df_fed_HE_RD g_d_grfp_total2yr toprank carneigie_veryhigh g_d_uni_public

* Prime Sample Statistics 
#d;
eststo clear;
eststo: estpost sum $publications $pubrank $controls $outcomes $insttraits if primesample == 1 ;
	esttab using "$dir/Descriptive_Stats.csv", label title(Descriptive Statistics by Award Type)
	mtitle ("Full Sample") 
	cells("count(fmt(0)) mean(fmt(3)) sd(fmt(3)) min(fmt(3)) max(fmt(3))") replace plain ;

* Stratified by Award Type
eststo clear;
bys award: eststo: estpost sum $publications $pubrank $controls $outcomes $insttraits if primesample == 1 ;
	esttab using "$dir/Descriptive_Stats.csv", label title(Descriptive Statistics by Award Type)
	cells("count(fmt(0)) mean(fmt(3)) sd(fmt(3))") append plain ;
#d cr

* Additional Summary Staistics  
#d;
eststo: estpost sum g_landgrnt g_hbcu if primesample == 1 ;
	esttab using "$dir/Add_DescStats.csv", label title(Additional Descriptive Statistics by Award Type)
	cells("count(fmt(0)) mean(fmt(3)) sd(fmt(3))") replace plain ;
eststo clear;
bys award: eststo: estpost sum g_landgrnt g_hbcu if primesample == 1 ;
	esttab using "$dir/Add_DescStats.csv", label title(Additional Descriptive Statistics by Award Type)
	cells("count(fmt(0)) mean(fmt(3)) sd(fmt(3))") append plain ;
#d cr
tab g_d_ProgramSize_Q award  if primesample == 1, col chi2

log using "$dir/Ttests.log", replace

* Statistical Differences 
foreach var in tot_pub first_pub_grfp_ref any_prior_pub any_FA_pub FA_first_pub_grfp_ref FA_share {
describe `var'
ttest `var' if primesample == 1, by(award)
}
foreach var in top01_tot_pub_max top05_tot_pub_max top10_tot_pub_max {
describe `var'
ttest `var' if primesample == 1, by(award)
}
foreach var in female field_1 field_2 field_3 {
describe `var'
ttest `var' if primesample == 1, by(award)
}
foreach var in complete_phd time foc_academic research postdoc {
describe `var'
ttest `var' if primesample == 1, by(award)
}
foreach var in g_d_avg_pubs_per_fac g_df_fed_HE_RD g_d_grfp_total2yr toprank carneigie_veryhigh g_d_uni_public {
describe `var'
ttest `var' if primesample == 1, by(award)
}
tab g_d_ProgramSize_Q award if primesample == 1, col chi2

pwcorr award $insttraits

* Power Test of Means
	* Awardee Average Publication Count: 14.5; SD 13; Total Sample Size 562; Average HM 12.4
power onemean 14.5 12.4, power(0.9) sd(13)
	* Minimum sample size is: 405
power onemean 14.5 12.4, power(0.8) sd(13)
	* Minimum sample size is: 303
power onemean 14.5 12.4, n(562) sd(13)
	* Power with sample size of 550 is: 0.9688
power onemean 14.5, power(0.9) n(562) sd(13)
	* Estimated alternate mean: 16.28

log close
}
********************************************************************************
**** Part 2: Wide-Form Data Analysis - Additional Outcomes & Robustness Checks ****
********************************************************************************
{
*** Additional Outcomes
	* 731 individuals completed a PhD, 77 completed another type of degree, 69 are undetermined

** Degree Completion
* PhD Completion: Logit
quietly logit complete_phd award female any_prior_pub_zero toprank carneigie_veryhigh g_d_uni_public i.field i.year_prop 
margins, dydx(*) post
outreg2 using "$dir/WideAltOutcomes.xls", replace dec(3) e(ll r2 rmse) label ctitle(Completed PhD, ME) addtext(Sample, Full, Model, Logit, Year Applied FE, Yes, Field Controls, Yes)

* If Completed PhD, Time to Degree: Negative Binomial
quietly nbreg time award female any_prior_pub_zero toprank carneigie_veryhigh g_d_uni_public i.field i.year_prop if complete_phd == 1
margins, dydx(*) post
outreg2 using "$dir/WideAltOutcomes.xls", append dec(3) e(ll r2 rmse) label ctitle(Time to PhD, ME) addtext(Sample, Completed PhD, Model, Negative Binomial, Year Applied FE, Yes, Field Controls, Yes)

** First Placement 
* Post-Doctoral Researcher: Logit
quietly logit position_5 award female any_prior_pub_zero toprank carneigie_veryhigh g_d_uni_public i.field i.year_prop time
margins, dydx(*) post
outreg2 using "$dir/WideAltOutcomes.xls", append dec(3) e(ll r2 rmse) label ctitle(Post-Doc Position, ME) addtext(Sample, Full, Model, Logit, Year Applied FE, Yes, Field Controls, Yes)

* Tenure-Track Faculty: Logit
quietly logit position_8 award female any_prior_pub_zero toprank carneigie_veryhigh g_d_uni_public i.field i.year_prop time
margins, dydx(*) post
outreg2 using "$dir/WideAltOutcomes.xls", append dec(3) e(ll r2 rmse) label ctitle(TT Position, ME) addtext(Sample, Full, Model, Logit, Year Applied FE, Yes, Field Controls, Yes)
}

****************************************************************************************************************************************************************
*** Pull in Long Form Data 
clear all
set more off
estimates clear
capture log close
macro drop all
use "${dir}/JPAM_Long Form_dataverse.dta",replace
********************************************************************************
**** Part 3: Long-Form Data Analysis - Primay Model & Outcome ****
********************************************************************************
{
*** Regression Power Analysis
	* Baseline Model: reg tot_pub $dd $pi $dept $uni $fe if never_pub == 0, cluster(pi_id)
	* R-squared = 0.28, N = 8,430, C = 562, # of Vars = 26
	* Test high R-squared with various effect sizes

** High R-Squared with Addition of Treatment Interaction Term 
powerreg, r2f(0.30) r2r(0.25) nvar(26) ntest(1) power(.9)	
	* Minimum Sample Size to Find Effect: 147
powerreg, r2f(0.30) r2r(0.25) nvar(26) ntest(1) alpha(.025) power(.9)	
	* Minimum Sample Size to Find Effect: 175
powerreg, r2f(0.30) r2r(0.275) nvar(26) ntest(1) alpha(.025) power(.9)	
	* Minimum Sample Size to Find Effect: 350
powerreg, r2f(0.30) r2r(0.28) nvar(26) ntest(1) alpha(.025) power(.9)	
	* Minimum Sample Size to Find Effect: 434
powerreg, r2f(0.30) r2r(0.285) nvar(26) ntest(1) alpha(.025) power(.9)	
	* Minimum Sample Size to Find Effect: 588
powerreg, r2f(0.30) r2r(0.29) nvar(26) ntest(1) alpha(.025) power(.9)	
	* Minimum Sample Size to Find Effect: 868

** Use actual R-squareds from baseline model with addition of single treatment variable
powerreg, r2f(0.2760) r2r(0.2749) nvar(26) ntest(1) power(.9)	
	* Minimum Sample Size to Find Effect: 6944
powerreg, r2f(0.2760) r2r(0.2749) nvar(26) ntest(1) power(.8)	
	* Minimum Sample Size to Find Effect: 5152
powerreg, r2f(0.2760) r2r(0.2749) nvar(25) ntest(1) n(8430)
	* Power Level: 0.9472
powerreg, r2f(0.2760) r2r(0.2749) nvar(25) ntest(1) n(6150)
	* Power Level: 0.8635
powerreg, r2f(0.2760) r2r(0.2749) nvar(25) ntest(1) n(2280)
	* Power Level: 0.4604
		* Raises questions on power of prior-pub sub-sample; really want at least 80% power...
		
*** Primary Model: Logged Publications DD Stratified by Prior Publication Status
** Setup
global dd treatment award prepost 
global pi female priorpubs_rest
global alt female
global dept g_d_avg_pubs_per_fac i.g_d_ProgramSize_Q toprank g_d_grfp_total2yr g_df_fed_HE_RD_ln
global altdept g_d_avg_pubs_per_fac i.g_d_ProgramSize_Q g_d_grfp_total2yr g_df_fed_HE_RD_ln
global uni carneigie_veryhigh g_d_uni_public
global fe i.field i.year_prop

** CCA Sample Defined 
gen primesample = .
replace primesample = 1 if priorpubs_rest !=. & tot_pub !=. & treatment !=. & female !=. & carneigie_veryhigh !=. & g_d_uni_public !=. & g_d_avg_pubs_per_fac !=. & g_d_ProgramSize_Q !=. & g_d_R3_mid !=. & g_d_grfp_total2yr !=. & g_df_fed_HE_RD_ln !=.
replace primesample = 0 if primesample == .

log using "$dir/AddPubStats.log", replace

** Additional Descriptive Statistics 
sort award
sum pp_rest_wide if never_pub == 0 & primesample == 1
by award: sum pp_rest_wide if never_pub == 0 & primesample == 1
ttest pp_rest_wide if never_pub == 0 & primesample == 1, by(award)
sum prior_pubs if priorpubs_rest == 1 & primesample == 1
by award: sum prior_pubs if priorpubs_rest == 1 & primesample == 1
ttest prior_pubs if priorpubs_rest == 1 & primesample == 1, by(award)
sum FA_tot_pub if year == 16 & FA_tot_pub > 0 & primesample == 1
by award: sum FA_tot_pub if year == 16 & FA_tot_pub > 0 & primesample == 1
ttest FA_tot_pub if year == 16 & FA_tot_pub > 0 & primesample == 1, by(award)

** Pre-Trend Diagnostics
ttest tot_pub if year == 5 & primesample == 1, by(award)
	* Diff: 0.09* (A>HM)
ttest tot_pub if year == 4 & primesample == 1, by(award)
	* Diff: 0.03 (NS) (A>HM)
ttest tot_pub if year == 5 & primesample == 1 & priorpubs_rest == 1, by(award)
	* Diff: 0.06 (NS) (A>HM)
ttest tot_pub if year == 4 & primesample == 1 & priorpubs_rest == 1, by(award)
	* Diff: 0.08 (NS) (HM>A)
ksmirnov tot_pub if year == 5 & primesample == 1, by(award)
	* Combo: 0.05 (NS)
ksmirnov tot_pub if year == 4 & primesample == 1, by(award)
	* Combo: 0.04 (NS)
ksmirnov tot_pub if year == 5 & primesample == 1 & priorpubs_rest == 1, by(award)	
	* Combo: 0.09 (NS)
ksmirnov tot_pub if year == 4 & primesample == 1 & priorpubs_rest == 1, by(award)	
	* Combo: 0.05 (NS)
		* Kolmogorov-Smirnov test for equality of distribution functions fails to reject the null hypothesis of equivilant distribtuions 
		
** Sample Comparison
gen broaderimpacts = 1 if year_prop > 1996 & year_prop !=. 
replace broaderimpacts = 0 if year_prop == 1995 | year_prop == 1996

ttest tot_pub if broaderimpacts == 0, by(award)
ttest tot_pub if broaderimpacts == 1, by(award)
ttest tot_pub, by(broaderimpacts)
ttest tot_pub if award == 0, by(broaderimpacts)
ttest tot_pub if award == 1, by(broaderimpacts)

ttest tot_pub if broaderimpacts == 0 & year == 5, by(award)
ttest tot_pub if broaderimpacts == 1 & year == 5, by(award)
ttest tot_pub if year == 5, by(broaderimpacts)
ttest tot_pub if award == 0 & year == 5, by(broaderimpacts)
ttest tot_pub if award == 1 & year == 5, by(broaderimpacts)
	
log close
	
*** Pre-Trend DD Diagnostic: Graphs	
** Annual Average Publication Count by Awardee/Honorable Mention
sort year
* Publishing Sample
by year: egen actpub_a = mean(tot_pub) if award == 1 & never_pub == 0 & primesample == 1
by year: egen actpub_hm = mean(tot_pub) if award == 0 & never_pub == 0 & primesample == 1
twoway  (scatter actpub_a year, mcolor(black) xline(6, lpattern(longdash) lcolor(gs13))) ///
		(scatter actpub_hm year, mcolor(gs10) msymbol(S)), ///
		yscale(range(0 10)) ylabel(0(2)10) ytitle("Number of Publications") xlabel(1 "-5" 6 "0" 11 "5" 16 "10") xtitle("Year") graphregion(fcolor(white)) ///
		legend(label(1 "Awardees") label(2 "Honorable Mentions")) legend(subtitle("Panel A: If Ever Publish"))
graph save Graph "${dir}/Graph_Pubs_Active.gph", replace
	*twoway (scatter actpub_a year, mcolor(black) xline(6)) (scatter actpub_hm year, mcolor(gold))(qfit actpub_a year if year<6, mcolor(black)) (qfit actpub_hm year if year<6, mcolor(gold))(qfit actpub_a year if year>6 & year<17, mcolor(black)) (qfit actpub_hm year if year>6 & year<17, mcolor(gold)) 

* Prior Publications Sample (Inherently Publishing)
by year: egen priorpub_a = mean(tot_pub) if award == 1 & priorpubs_rest == 1 & primesample == 1
by year: egen priorpub_hm = mean(tot_pub) if award == 0 & priorpubs_rest == 1 & primesample == 1
twoway  (scatter priorpub_a year, mcolor(black) xline(6, lpattern(longdash) lcolor(gs13))) ///
		(scatter priorpub_hm year, mcolor(gs10) msymbol(S)), ///
		yscale(range(0 10)) ylabel(0(2)10) ytitle("Number of Publications") xlabel(1 "-5" 6 "0" 11 "5" 16 "10") xtitle("Year") graphregion(fcolor(white)) ///
		legend(label(1 "Awardees") label(2 "Honorable Mentions")) legend(subtitle("Panel B: If Have Prior Publications"))
graph save Graph "${dir}/Graph_Pubs_Prior.gph", replace

* Publishing with No Prior Pubs Sample
by year: egen nppe_a = mean(tot_pub) if award == 1 & priorpubs_rest == 0 & never_pub == 0 & primesample == 1
by year: egen nppe_hm = mean(tot_pub) if award == 0 & priorpubs_rest == 0 & never_pub == 0 & primesample == 1
twoway  (scatter nppe_a year, mcolor(black) xline(6, lpattern(longdash) lcolor(gs13))) ///
		(scatter nppe_hm year, mcolor(gs10) msymbol(S)), ///
		yscale(range(0 10)) ylabel(0(2)10) ytitle("Number of Publications") xlabel(1 "-5" 6 "0" 11 "5" 16 "10") xtitle("Year") graphregion(fcolor(white)) ///
		legend(label(1 "Awardees") label(2 "Honorable Mentions")) legend(subtitle("Panel C: If Have No Prior Publications"))
graph save Graph "${dir}/Graph_Pubs_PubsNoPrior.gph", replace

** Fully Specified & Stratified by Prior Publications for Ever Publish Sample Level Form
reg tot_pub 
outreg2 using "$dir/Primary_tot_pub.xls", replace dec(3) e(ll r2_a rmse) ctitle(Test Constant)
foreach outcome in tot_pub {
quietly reg `outcome' $dd $pi $dept $uni $fe if never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Primary_`outcome'.xls", append dec(3) ci e(ll r2_a rmse) ctitle(Full Sample, `outcome') label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, OLS Long Form DD; Outcome: Total Publications; Clustered standard errors in parentheses)
outreg2 using "$dir/Primary_`outcome'.xls", append dec(3) e(ll r2_a rmse) ctitle(Full Sample, `outcome') label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, OLS Long Form DD; Outcome: Total Publications; Clustered standard errors in parentheses)
quietly reg `outcome' $dd $alt $dept $uni $fe if priorpubs_rest == 1, cluster(pi_id)
outreg2 using "$dir/Primary_`outcome'.xls", append dec(3) ci e(ll r2_a rmse) ctitle(Prior Publications, `outcome') label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes)
outreg2 using "$dir/Primary_`outcome'.xls", append dec(3)  e(ll r2_a rmse) ctitle(Prior Publications, `outcome') label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes)
quietly reg `outcome' $dd $alt $dept $uni $fe if priorpubs_rest == 0  & never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Primary_`outcome'.xls", append dec(3) ci e(ll r2_a rmse) ctitle(No Prior Activity, `outcome') label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes)
outreg2 using "$dir/Primary_`outcome'.xls", append dec(3) e(ll r2_a rmse) ctitle(No Prior Activity, `outcome') label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes)
}

** Post-Trend DD Diagnostic: DD with Discontinuity 
* Triple Difference - Checks Slopes Betweeen A & HM Pre & Post
gen awardyear = award*year
gen yearpost = year*prepost
gen triple = treatment*year
* Total Publications
reg tot_pub triple award year prepost awardyear treatment yearpost $pi $dept $uni $fe if never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Primary_DDD.xls", replace dec(3) e(ll r2_a rmse) ctitle(Full Sample, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Controls, Yes)
outreg2 using "$dir/Primary_DDD.xls", append dec(3) ci e(ll r2_a rmse) ctitle(Full Sample, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Controls, Yes)
reg tot_pub triple award year prepost awardyear treatment yearpost $alt $dept $uni $fe if priorpubs_rest == 1, cluster(pi_id)
outreg2 using "$dir/Primary_DDD.xls", append dec(3) e(ll r2_a rmse) ctitle(Prior Publications, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Controls, Yes)
outreg2 using "$dir/Primary_DDD.xls", append dec(3) ci e(ll r2_a rmse) ctitle(Prior Publications, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Controls, Yes)
reg tot_pub triple award year prepost awardyear treatment yearpost $alt $dept $uni $fe if priorpubs_rest == 0 & never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Primary_DDD.xls", append dec(3) e(ll r2_a rmse) ctitle(No Prior Activity, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Controls, Yes)
outreg2 using "$dir/Primary_DDD.xls", append dec(3) ci e(ll r2_a rmse) ctitle(No Prior Activity, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Controls, Yes)

** Multicollinearity VIF Check
* Primary Model & Outcome
log using "$dir/VIF.log", replace
quietly reg tot_pub $dd $pi $dept $uni $fe if never_pub == 0, cluster(pi_id)
estat vif
quietly reg tot_pub $dd $alt $dept $uni $fe if priorpubs_rest == 1, cluster(pi_id)
estat vif
quietly reg tot_pub $dd $alt $dept $uni $fe if priorpubs_rest == 0 & never_pub == 0, cluster(pi_id)
estat vif
log close
}

********************************************************************************
**** Part 4: Long-Form Data Analysis - Robustness & Sensitivity Tests ****
********************************************************************************
{
*** Sensitivity Tests
** Primary Model Estimated on Full Sample 
quietly reg tot_pub $dd $pi $dept $uni $fe, cluster(pi_id)
outreg2 using "$dir/Sensitivity.xls", replace dec(3) ci e(ll r2_a rmse) ctitle(Expanded Sample, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS, OLS Long Form DD; Outcome: Total Publications; Clustered standard errors in parentheses)
outreg2 using "$dir/Sensitivity.xls", append dec(3) e(ll r2_a rmse) ctitle(Expanded Sample, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS, OLS Long Form DD; Outcome: Total Publications; Clustered standard errors in parentheses)
reg tot_pub triple award year prepost awardyear treatment yearpost $pi $dept $uni $fe , cluster(pi_id)
outreg2 using "$dir/Sensitive_DDD.xls", replace dec(3) e(ll r2_a rmse) ctitle(OLS Long DDD, Total Pubs) label addtext(Cluster, Yes, Sample, Full, Controls, Yes)

** Primary Model Estimated on Post-Doc Sample
quietly reg tot_pub $dd $pi $dept $uni $fe if never_pub == 0 & postdoc == 1, cluster(pi_id)
outreg2 using "$dir/Sensitivity.xls", append dec(3) ci e(ll r2_a rmse) ctitle(Post-Doc Publish, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
outreg2 using "$dir/Sensitivity.xls", append dec(3) e(ll r2_a rmse) ctitle(Post-Doc Publish, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
reg tot_pub triple award year prepost awardyear treatment yearpost $pi $dept $uni $fe if never_pub == 0 & postdoc == 1, cluster(pi_id)
outreg2 using "$dir/Sensitive_DDD.xls", append dec(3) e(ll r2_a rmse) ctitle(OLS Long DDD, Total Pubs) label addtext(Cluster, Yes, Sample, Post-Doc Publish, Controls, Yes)

** Primary Model Stratified by Department Rank
quietly reg tot_pub $dd $pi $altdept $uni $fe if toprank == 1 & never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Sensitivity.xls", append dec(3) ci e(ll r2_a rmse) ctitle(Publish Rank 1, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
outreg2 using "$dir/Sensitivity.xls", append dec(3) e(ll r2_a rmse) ctitle(Publish Rank 1, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
quietly reg tot_pub $dd $pi $altdept $uni $fe if toprank == 0 & never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Sensitivity.xls", append dec(3) ci e(ll r2_a rmse) ctitle(Publish Rank 2 & 3, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
outreg2 using "$dir/Sensitivity.xls", append dec(3) e(ll r2_a rmse) ctitle(Publish Rank 2 & 3, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
reg tot_pub triple award year prepost awardyear treatment yearpost $pi $altdept $uni $fe if toprank == 1 & never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Sensitive_DDD.xls", append dec(3) e(ll r2_a rmse) ctitle(OLS Long DDD, Total Pubs) label addtext(Cluster, Yes, Sample, Publish Rank 1, Controls, Yes)
reg tot_pub triple award year prepost awardyear treatment yearpost $pi $altdept $uni $fe if toprank == 0 & never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Sensitive_DDD.xls", append dec(3) e(ll r2_a rmse) ctitle(OLS Long DDD, Total Pubs) label addtext(Cluster, Yes, Sample, Publish Rank 2 & 3, Controls, Yes)

*** Primary Model Estimated with Varying Timeframes 
** Traditional DD - Single Pre & Post Periods
* Five Years Post
quietly reg tot_pub $dd $pi $dept $uni $fe if (year == 5 | year == 11) & never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Sensitivity.xls", append dec(3) ci e(ll r2_a rmse) ctitle(Publish Years -1 & 5, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
outreg2 using "$dir/Sensitivity.xls", append dec(3) e(ll r2_a rmse) ctitle(Publish Years -1 & 5, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
* Ten Years Post
quietly reg tot_pub $dd $pi $dept $uni $fe if (year == 5 | year == 16) & never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Sensitivity.xls", append dec(3) ci e(ll r2_a rmse) ctitle(Publish Years -1 & 10, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
outreg2 using "$dir/Sensitivity.xls", append dec(3) e(ll r2_a rmse) ctitle(Publish Years -1 & 10, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)

** Abbreviated Timeframe
* Cutting off first and last three years
quietly reg tot_pub $dd $pi $dept $uni $fe if year > 3 & year < 14 & never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Sensitivity.xls", append dec(3) ci e(ll r2_a rmse) ctitle(Publish Years -2 to 7, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
outreg2 using "$dir/Sensitivity.xls", append dec(3) e(ll r2_a rmse) ctitle(Publish Years -2 to 7, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
reg tot_pub triple award year prepost awardyear treatment yearpost $pi $dept $uni $fe if year > 3 & year < 14 & never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Sensitive_DDD.xls", append dec(3) e(ll r2_a rmse) ctitle(OLS Long DDD, Total Pubs) label addtext(Cluster, Yes, Sample, Publish Year 4 to 13, Controls, Yes)

* Correspondng Annual Average Publication Graphs
sort year
twoway  (scatter actpub_a year, mcolor(black) xline(6, lpattern(longdash) lcolor(gs13))) ///
		(scatter actpub_hm year, mcolor(gs10) msymbol(S)) if year == 5 | year == 11, ///
		yscale(range(0 10)) ylabel(0(2)10) ytitle("Number of Publications") xlabel(1 "-5" 6 "0" 11 "5" 16 "10") xtitle("Year") graphregion(fcolor(white)) ///
		legend(label(1 "Awardees") label(2 "Honorable Mentions")) legend(subtitle("Panel A: Two-Period DD Five Years-Post GRFP"))
graph save Graph "${dir}/Graph_511_Pubs_Active.gph", replace

twoway  (scatter actpub_a year, mcolor(black) xline(6, lpattern(longdash) lcolor(gs13))) ///
		(scatter actpub_hm year, mcolor(gs10) msymbol(S)) if year == 5 | year == 16, ///
		yscale(range(0 10)) ylabel(0(2)10) ytitle("Number of Publications") xlabel(1 "-5" 6 "0" 11 "5" 16 "10") xtitle("Year") graphregion(fcolor(white)) ///
		legend(label(1 "Awardees") label(2 "Honorable Mentions")) legend(subtitle("Panel B: Two-Period DD Ten Years-Post GRFP"))
graph save Graph "${dir}/Graph_516_Pubs_Active.gph", replace

twoway  (scatter actpub_a year, mcolor(black) xline(6, lpattern(longdash) lcolor(gs13))) ///
		(scatter actpub_hm year, mcolor(gs10) msymbol(S)) if year > 3 & year < 14, ///
		yscale(range(0 10)) ylabel(0(2)10) ytitle("Number of Publications") xlabel(1 "-5" 6 "0" 11 "5" 16 "10") xtitle("Year") graphregion(fcolor(white)) ///
		legend(label(1 "Awardees") label(2 "Honorable Mentions")) legend(subtitle("Panel C: Linear Range From Year -2 To 7"))
graph save Graph "${dir}/Graph_413_Pubs_Active.gph", replace

*** Robustness Checks
** Primary Model Estimated with OLS & Log+1 FF for Ever Publish Sample
quietly reg ln2_totpub $dd $pi $dept $uni $fe if never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Robustness.xls", replace dec(3) ci e(ll r2_a rmse) ctitle(Full Sample, LN Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS, OLS Long Form DD; Outcome: LN Plus 1 Total Publications; Clustered standard errors in parentheses)
outreg2 using "$dir/Robustness.xls", append dec(3) e(ll r2_a rmse) ctitle(Full Sample, LN Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS, OLS Long Form DD; Outcome: LN Plus 1 Total Publications; Clustered standard errors in parentheses)
quietly reg ln2_totpub $dd $alt $dept $uni $fe if priorpubs_rest == 1, cluster(pi_id)
outreg2 using "$dir/Robustness.xls", append dec(3) ci e(ll r2_a rmse) ctitle(Prior Publications, LN Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
outreg2 using "$dir/Robustness.xls", append dec(3) e(ll r2_a rmse) ctitle(Prior Publications, LN Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
quietly reg ln2_totpub $dd $alt $dept $uni $fe if priorpubs_rest == 0  & never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Robustness.xls", append dec(3) ci e(ll r2_a rmse) ctitle(No Prior Activity, LN Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
outreg2 using "$dir/Robustness.xls", append dec(3) e(ll r2_a rmse) ctitle(No Prior Activity, LN Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
* DD with Discontinuity Check
reg ln2_totpub triple award year prepost awardyear treatment yearpost $pi $dept $uni $fe if never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Robust_DDD.xls", replace dec(3) e(ll r2_a rmse) ctitle(OLS Long DDD, LN Pubs) label addtext(Cluster, Yes, Sample, Full Sample, Controls, Yes)
reg ln2_totpub triple award year prepost awardyear treatment yearpost $alt $dept $uni $fe if priorpubs_rest == 1, cluster(pi_id)
outreg2 using "$dir/Robust_DDD.xls", append dec(3) e(ll r2_a rmse) ctitle(OLS Long DDD, LN Pubs) label addtext(Cluster, Yes, Sample, Prior Pubs, Controls, Yes)
reg ln2_totpub triple award year prepost awardyear treatment yearpost $alt $dept $uni $fe if priorpubs_rest == 0 & never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Robust_DDD.xls", append dec(3) e(ll r2_a rmse) ctitle(OLS Long DDD, LN Pubs) label addtext(Cluster, Yes, Sample, Pubs No Prior, Controls, Yes)

** Primary Model Estimated with OLS & Log+0.1 FF for Ever Publish Sample
quietly reg ln1_totpub $dd $pi $dept $uni $fe if never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Robustness.xls", append dec(3) ci e(ll r2_a rmse) ctitle(Full Sample, LN Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS, OLS Long Form DD; Outcome: LN Plus 0.1 Total Publications; Clustered standard errors in parentheses)
outreg2 using "$dir/Robustness.xls", append dec(3) e(ll r2_a rmse) ctitle(Full Sample, LN Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS, OLS Long Form DD; Outcome: LN Plus 0.1 Total Publications; Clustered standard errors in parentheses)
quietly reg ln1_totpub $dd $alt $dept $uni $fe if priorpubs_rest == 1, cluster(pi_id)
outreg2 using "$dir/Robustness.xls", append dec(3) ci e(ll r2_a rmse) ctitle(Prior Publications, LN Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
outreg2 using "$dir/Robustness.xls", append dec(3) e(ll r2_a rmse) ctitle(Prior Publications, LN Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
quietly reg ln1_totpub $dd $alt $dept $uni $fe if priorpubs_rest == 0  & never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Robustness.xls", append dec(3) ci e(ll r2_a rmse) ctitle(No Prior Activity, LN Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
outreg2 using "$dir/Robustness.xls", append dec(3) e(ll r2_a rmse) ctitle(No Prior Activity, LN Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
* DD with Discontinuity Check
reg ln1_totpub triple award year prepost awardyear treatment yearpost $pi $dept $uni $fe if never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Robust_DDD.xls", append dec(3) e(ll r2_a rmse) ctitle(OLS Long DDD, LN Pubs) label addtext(Cluster, Yes, Sample, Full Sample, Controls, Yes)
reg ln1_totpub triple award year prepost awardyear treatment yearpost $alt $dept $uni $fe if priorpubs_rest == 1, cluster(pi_id)
outreg2 using "$dir/Robust_DDD.xls", append dec(3) e(ll r2_a rmse) ctitle(OLS Long DDD, LN Pubs) label addtext(Cluster, Yes, Sample, Prior Pubs, Controls, Yes)
reg ln1_totpub triple award year prepost awardyear treatment yearpost $alt $dept $uni $fe if priorpubs_rest == 0 & never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Robust_DDD.xls", append dec(3) e(ll r2_a rmse) ctitle(OLS Long DDD, LN Pubs) label addtext(Cluster, Yes, Sample, Pubs No Prior, Controls, Yes)

** OLS Count Estimation with Linear Time Constraints
* Traditional DD - Single Pre & Post Periods: Five Years Post
quietly reg tot_pub $dd $pi $dept $uni $fe if (year == 5 | year == 11) & never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Robustness.xls", append dec(3) ci e(ll r2_a rmse) ctitle(Publish Years -1 & 5, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
outreg2 using "$dir/Robustness.xls", append dec(3) e(ll r2_a rmse) ctitle(Publish Years -1 & 5, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
* Ten Years Post
quietly reg tot_pub $dd $pi $dept $uni $fe if (year == 5 | year == 16) & never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Robustness.xls", append dec(3) ci e(ll r2_a rmse) ctitle(Publish Years -1 & 10, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
outreg2 using "$dir/Robustness.xls", append dec(3) e(ll r2_a rmse) ctitle(Publish Years -1 & 10, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
* Abbreviated Timeframe: Cutting off first and last three years
quietly reg tot_pub $dd $pi $dept $uni $fe if year > 3 & year < 14 & never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Robustness.xls", append dec(3) ci e(ll r2_a rmse) ctitle(Publish Years -2 to 7, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
outreg2 using "$dir/Robustness.xls", append dec(3) e(ll r2_a rmse) ctitle(Publish Years -2 to 7, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
reg tot_pub triple award year prepost awardyear treatment yearpost $pi $dept $uni $fe if year > 3 & year < 14 & never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Robust_DDD.xls", append dec(3) e(ll r2_a rmse) ctitle(OLS Long DDD, Total Pubs) label addtext(Cluster, Yes, Sample, Publish Year 4 to 13, Controls, Yes)

** Primary Model Estimated with Poisson & Publication Counts 
quietly poisson tot_pub $dd $pi $dept $uni $fe if never_pub == 0, cluster(pi_id)
margins, expression(exp(predict(xb)) - exp(predict(xb)-_b[treatment])) at(award=1 prepost=1 treatment=1)
	* Margin: .2344756 with P-value: 0.782 
quietly poisson tot_pub $dd $alt $dept $uni $fe if priorpubs_rest == 1, cluster(pi_id)
margins, expression(exp(predict(xb)) - exp(predict(xb)-_b[treatment])) at(award=1 prepost=1 treatment=1)
	* Margin: .6305529 with P-value: 0.490

** Fixed Effects Model with Logged Publication Activity 
xtset pi_id year 
xtreg tot_pub treatment i.year if never_pub == 0 & primesample == 1, fe 
outreg2 using "$dir/Robustness_FE.xls", replace dec(3) e(ll r2_a rmse) ctitle(CCA Ever Publish Sample) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, FE Long Form; Outcome: Total Publications)
xtreg tot_pub treatment i.year if primesample == 1, fe 
outreg2 using "$dir/Robustness_FE.xls", append dec(3) e(ll r2_a rmse) ctitle(CCA Sample) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, FE Long Form; Outcome: Total Publications)
xtreg tot_pub treatment i.year if never_pub == 0 , fe 
outreg2 using "$dir/Robustness_FE.xls", append dec(3) e(ll r2_a rmse) ctitle(Ever Publish Sample) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, FE Long Form; Outcome: Total Publications)
xtreg tot_pub treatment i.year, fe 
outreg2 using "$dir/Robustness_FE.xls", append dec(3) e(ll r2_a rmse) ctitle(Full Sample) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, FE Long Form; Outcome: Total Publications)

** Cluster by Year 
	* Could be biased as publications increase over time, check with year clusters and two-way clusters
		* Two-way Cluster by Year & PI-ID can't use if statements with command 	
quietly reg tot_pub $dd $pi $dept $uni $fe if never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Robustness_CSE.xls", replace dec(3) e(ll r2_a rmse) ctitle(Full Sample, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by, Researcher, Year Applied FE, Yes, OLS Long Form DD; Outcome: Total Publications; Clustered standard errors in parentheses)
quietly reg tot_pub $dd $pi $dept $uni $fe if never_pub == 0, cluster(year)
outreg2 using "$dir/Robustness_CSE.xls", append dec(3) e(ll r2_a rmse) ctitle(Full Sample, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by, Year, Year Applied FE, Yes)
preserve
keep if never_pub == 0 
vce2way reg tot_pub $dd $pi $dept $uni $fe , cluster(pi_id year)
outreg2 using "$dir/Robustness_CSE.xls", append dec(3) e(ll r2_a rmse) ctitle(Full Sample, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by, Researcher and Year, Year Applied FE, Yes)
restore

quietly reg tot_pub $dd $alt $dept $uni $fe if priorpubs_rest == 1, cluster(pi_id)
outreg2 using "$dir/Robustness_CSE.xls", append dec(3)  e(ll r2_a rmse) ctitle(Prior Publications, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by, Researcher, Year Applied FE, Yes)
quietly reg tot_pub $dd $alt $dept $uni $fe if priorpubs_rest == 1, cluster(year)
outreg2 using "$dir/Robustness_CSE.xls", append dec(3)  e(ll r2_a rmse) ctitle(Prior Publications, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by, Year, Year Applied FE, Yes)
preserve
keep if priorpubs_rest == 1
vce2way reg tot_pub $dd $alt $dept $uni $fe , cluster(pi_id year)
outreg2 using "$dir/Robustness_CSE.xls", append dec(3)  e(ll r2_a rmse) ctitle(Prior Publications, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by, Researcher and Year, Year Applied FE, Yes)
restore

quietly reg tot_pub $dd $alt $dept $uni $fe if priorpubs_rest == 0  & never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Robustness_CSE.xls", append dec(3) e(ll r2_a rmse) ctitle(No Prior Activity, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by, Researcher, Year Applied FE, Yes)
quietly reg tot_pub $dd $alt $dept $uni $fe if priorpubs_rest == 0  & never_pub == 0, cluster(year)
outreg2 using "$dir/Robustness_CSE.xls", append dec(3) e(ll r2_a rmse) ctitle(No Prior Activity, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by, Year, Year Applied FE, Yes)
preserve
keep if priorpubs_rest == 0  & never_pub == 0
vce2way reg tot_pub $dd $alt $dept $uni $fe , cluster(pi_id year)
outreg2 using "$dir/Robustness_CSE.xls", append dec(3) e(ll r2_a rmse) ctitle(No Prior Activity, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by, Researcher and Year, Year Applied FE, Yes)
restore
	
** Broad Impacts Sample Estimation
quietly reg tot_pub $dd $pi $dept $uni $fe if never_pub == 0 & broaderimpacts == 1, cluster(pi_id)
outreg2 using "$dir/BI_sample.xls", replace dec(3) e(ll r2_a rmse) ctitle(Full Sample) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, OLS Long Form DD; Outcome: Total Publications; Clustered standard errors in parentheses)
quietly reg tot_pub $dd $alt $dept $uni $fe if priorpubs_rest == 1 & broaderimpacts == 1, cluster(pi_id)
outreg2 using "$dir/BI_sample.xls", append dec(3)  e(ll r2_a rmse) ctitle(Prior Publications) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes)
quietly reg tot_pub $dd $alt $dept $uni $fe if priorpubs_rest == 0  & never_pub == 0 & broaderimpacts == 1, cluster(pi_id)
outreg2 using "$dir/BI_sample.xls", append dec(3) e(ll r2_a rmse) ctitle(No Prior Activity) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes)

** Outliers Restricted: Note, the two publication outliers (PI IDs 412 and 705) are already dropped for excessive prior publications

** Alternate Form of Prior Publications - Interacted with Treatment Variable
	* Alternate to stratification; produces the same results
gen priortriple2 = award*prepost*priorpubs_rest
gen triple1 = award*priorpubs_rest
gen triple2 = prepost*priorpubs_rest

quietly reg tot_pub $dd priortriple $pi $dept $uni $fe if never_pub == 0 , cluster(pi_id)
outreg2 using "$dir/priorinteract.xls", replace dec(3) e(ll r2_a rmse) ctitle(Ever Publish Sample, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, OLS Long Form DD; Outcome: Total Publications; Clustered standard errors in parentheses)
quietly reg tot_pub priortriple triple1 triple2 $dd $pi $dept $uni $fe if never_pub == 0 , cluster(pi_id)
outreg2 using "$dir/priorinteract.xls", append dec(3) e(ll r2_a rmse) ctitle(Ever Publish Sample, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, OLS Long Form DD; Outcome: Total Publications; Clustered standard errors in parentheses)

xtreg tot_pub priortriple i.year , fe
outreg2 using "$dir/priorinteract.xls", append dec(3) e(ll r2_a rmse) ctitle(Full Sample, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, OLS Long Form DD; Outcome: Total Publications; Clustered standard errors in parentheses)
outreg2 using "$dir/Robustness_FE.xls", append dec(3) e(ll r2_a rmse) ctitle(Full Sample) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, FE Long Form; Outcome: Total Publications)
xtreg tot_pub treatment priortriple i.year , fe
outreg2 using "$dir/priorinteract.xls", append dec(3) e(ll r2_a rmse) ctitle(Full Sample, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, OLS Long Form DD; Outcome: Total Publications; Clustered standard errors in parentheses)
outreg2 using "$dir/Robustness_FE.xls", append dec(3) e(ll r2_a rmse) ctitle(Full Sample) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, FE Long Form; Outcome: Total Publications)
xtreg tot_pub treatment priortriple triple2 i.year , fe
outreg2 using "$dir/priorinteract.xls", append dec(3) e(ll r2_a rmse) ctitle(Full Sample, Total Pubs) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, OLS Long Form DD; Outcome: Total Publications; Clustered standard errors in parentheses)
outreg2 using "$dir/Robustness_FE.xls", append dec(3) e(ll r2_a rmse) ctitle(Full Sample) label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, FE Long Form; Outcome: Total Publications)
}

********************************************************************************
**** Part 5: Long-Form Data Analysis - Additional Outcomes ****
********************************************************************************
{
*** Graphs: Annual Average First Author and Top Tier Publications
** Publishing Sample
sort year
* First Author Publications 
by year: egen actfapub_a = mean(FA_tot_pub) if award == 1 & never_pub == 0 & primesample == 1
by year: egen actfapub_hm = mean(FA_tot_pub) if award == 0 & never_pub == 0 & primesample == 1
twoway  (scatter actfapub_a year, mcolor(black) xline(6, lpattern(longdash) lcolor(gs13))) ///
		(scatter actfapub_hm year, mcolor(gs10) msymbol(S)), ///
		yscale(range(0 4)) ylabel(0(1)4) ytitle("First Author Publications") xlabel(1 "-5" 6 "0" 11 "5" 16 "10") xtitle("Year") graphregion(fcolor(white)) ///
		legend(label(1 "Awardees") label(2 "Honorable Mentions")) legend(subtitle("If Ever Publish"))
graph save Graph "${dir}/Graph_FA_Active.gph", replace

* Top 10% Journal Publications
by year: egen tenactpub_a = mean(top10_tot_pub) if award == 1 & never_pub == 0 & primesample == 1
by year: egen tenactpub_hm = mean(top10_tot_pub) if award == 0 & never_pub == 0 & primesample == 1
twoway  (scatter tenactpub_a year, mcolor(black) xline(6, lpattern(longdash) lcolor(gs13))) ///
		(scatter tenactpub_hm year, mcolor(gs10) msymbol(S)), ///
		yscale(range(0 1.5)) ylabel(0(0.5)1.5) ytitle("Publications in the Top 10%") xlabel(1 "-5" 6 "0" 11 "5" 16 "10") xtitle("Year") graphregion(fcolor(white)) ///
		legend(label(1 "Awardees") label(2 "Honorable Mentions")) legend(subtitle("Panel A: If Ever Publish"))
graph save Graph "${dir}/Graph_Ten_Active.gph", replace

* Top 5% Journal Publications
by year: egen fiveactpub_a = mean(top05_tot_pub) if award == 1 & never_pub == 0 & primesample == 1
by year: egen fiveactpub_hm = mean(top05_tot_pub) if award == 0 & never_pub == 0 & primesample == 1
twoway  (scatter fiveactpub_a year, mcolor(black) xline(6, lpattern(longdash) lcolor(gs13))) ///
		(scatter fiveactpub_hm year, mcolor(gs10) msymbol(S)), ///
		yscale(range(0 1)) ylabel(0(0.2)1) ytitle("Publications in the Top 5%") xlabel(1 "-5" 6 "0" 11 "5" 16 "10") xtitle("Year") graphregion(fcolor(white)) ///
		legend(label(1 "Awardees") label(2 "Honorable Mentions")) legend(subtitle("Panel B: If Ever Publish"))
graph save Graph "${dir}/Graph_Five_Active.gph", replace

* Top 1% Journal Publications 
by year: egen oneactpub_a = mean(top01_tot_pub) if award == 1 & never_pub == 0 & primesample == 1
by year: egen oneactpub_hm = mean(top01_tot_pub) if award == 0 & never_pub == 0 & primesample == 1
twoway  (scatter oneactpub_a year, mcolor(black) xline(6, lpattern(longdash) lcolor(gs13))) ///
		(scatter oneactpub_hm year, mcolor(gs10) msymbol(S)), ///
		yscale(range(0 0.5)) ylabel(0(0.1)0.5) ytitle("Publications in the Top 1%") xlabel(1 "-5" 6 "0" 11 "5" 16 "10") xtitle("Year") graphregion(fcolor(white)) ///
		legend(label(1 "Awardees") label(2 "Honorable Mentions")) legend(subtitle("Panel C: If Ever Publish"))
graph save Graph "${dir}/Graph_One_Active.gph", replace
	
*** Primary Model with Alternate Outcomes
** Setup 
reg FA_tot_pub 
outreg2 using "$dir/Altoutcomes.xls", replace dec(3) e(ll r2_a rmse) ctitle(Test Constant) addtext(OLS Long Form DD; Outcome: Total Publications; Clustered standard errors in parentheses)
outreg2 using "$dir/Altoutcomes_DDD.xls", replace dec(3) e(ll r2_a rmse) ctitle(Test Constant)

** Alternate Outcomes for Ever Publish Sample
foreach outcome in FA_tot_pub top01_tot_pub top05_tot_pub top10_tot_pub {
quietly reg `outcome' $dd $pi $dept $uni $fe if never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Altoutcomes.xls", append dec(3) ci e(ll r2_a rmse) ctitle(Full Sample, `outcome') label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
outreg2 using "$dir/Altoutcomes.xls", append dec(3) e(ll r2_a rmse) ctitle(Full Sample, `outcome') label addtext(Emerging Researchers, TBD, Adjusted R-Squared, TBD, Clustered by Researcher, Yes, Year Applied FE, Yes, Controls, Yes, DD with Discontinuity, NS)
reg `outcome' triple award year prepost awardyear treatment yearpost $pi $dept $uni $fe if never_pub == 0, cluster(pi_id)
outreg2 using "$dir/Altoutcomes_DDD.xls", append dec(3) e(ll r2_a rmse) ctitle(Full Sample, `outcome') label addtext(Cluster, Yes, Sample, Ever Publish, Controls, Yes)
}
}
********************************************************************************
*** The End ***
********************************************************************************
